{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import torchvision\n",
    "import torch.optim as optim\n",
    "import torch\n",
    "import torch.nn as nn\n",
    "from torch.utils.data import DataLoader\n",
    "from train_densenet import *\n",
    "from data_utils import *\n",
    "from math import ceil\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DenseNet(\n",
       "  (features): Sequential(\n",
       "    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)\n",
       "    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "    (relu0): ReLU(inplace=True)\n",
       "    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)\n",
       "    (denseblock1): _DenseBlock(\n",
       "      (denselayer1): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer2): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(96, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer3): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer4): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(160, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer5): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(192, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer6): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(224, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(224, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "    )\n",
       "    (transition1): _Transition(\n",
       "      (norm): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "      (relu): ReLU(inplace=True)\n",
       "      (conv): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "      (pool): AvgPool2d(kernel_size=2, stride=2, padding=0)\n",
       "    )\n",
       "    (denseblock2): _DenseBlock(\n",
       "      (denselayer1): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer2): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(160, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer3): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(192, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer4): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(224, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(224, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer5): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer6): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(288, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(288, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer7): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(320, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer8): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(352, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(352, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer9): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer10): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(416, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(416, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer11): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(448, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(448, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer12): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(480, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "    )\n",
       "    (transition2): _Transition(\n",
       "      (norm): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "      (relu): ReLU(inplace=True)\n",
       "      (conv): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "      (pool): AvgPool2d(kernel_size=2, stride=2, padding=0)\n",
       "    )\n",
       "    (denseblock3): _DenseBlock(\n",
       "      (denselayer1): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer2): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(288, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(288, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer3): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(320, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer4): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(352, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(352, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer5): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(384, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer6): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(416, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(416, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer7): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(448, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(448, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer8): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(480, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(480, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer9): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer10): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(544, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(544, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer11): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(576, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer12): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(608, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(608, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer13): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(640, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer14): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(672, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer15): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(704, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(704, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer16): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(736, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(736, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer17): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(768, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer18): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(800, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(800, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer19): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(832, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(832, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer20): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(864, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(864, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer21): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(896, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(896, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer22): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(928, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(928, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer23): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(960, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer24): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(992, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(992, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "    )\n",
       "    (transition3): _Transition(\n",
       "      (norm): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "      (relu): ReLU(inplace=True)\n",
       "      (conv): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "      (pool): AvgPool2d(kernel_size=2, stride=2, padding=0)\n",
       "    )\n",
       "    (denseblock4): _DenseBlock(\n",
       "      (denselayer1): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer2): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(544, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(544, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer3): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(576, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer4): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(608, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(608, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer5): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(640, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(640, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer6): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(672, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(672, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer7): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(704, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(704, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer8): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(736, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(736, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer9): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(768, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(768, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer10): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(800, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(800, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer11): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(832, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(832, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer12): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(864, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(864, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer13): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(896, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(896, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer14): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(928, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(928, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer15): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(960, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "      (denselayer16): _DenseLayer(\n",
       "        (norm1): BatchNorm2d(992, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu1): ReLU(inplace=True)\n",
       "        (conv1): Conv2d(992, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)\n",
       "        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "        (relu2): ReLU(inplace=True)\n",
       "        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)\n",
       "      )\n",
       "    )\n",
       "    (norm5): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n",
       "  )\n",
       "  (classifier): Linear(in_features=1024, out_features=1000, bias=True)\n",
       ")"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "torch.backends.cudnn.benchmark = True\n",
    "densenet = torchvision.models.densenet121(pretrained=True)\n",
    "densenet.cuda()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "densenet.classifier.out_features = 257"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def weights_init(m):\n",
    "    \"\"\"\n",
    "    用于初始化模型的权值和偏置\n",
    "    \"\"\"\n",
    "    classname = m.__class__.__name__\n",
    "    if classname.find('Conv') != -1:\n",
    "        m.weight.data.normal_(0.0, 0.02)\n",
    "    elif classname.find('BatchNorm') != -1:\n",
    "        m.weight.data.normal_(1.0, 0.02)\n",
    "        m.bias.data.fill_(0)\n",
    "    elif classname.find('Linear') != -1:\n",
    "        m.weight.data.normal_(0.0, 0.02)\n",
    "        if m.bias is not None:\n",
    "            m.bias.data.fill_(0.0)\n",
    "\n",
    "# densenet.apply(weights_init)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "classifier_weights = [densenet.classifier.weight]\n",
    "classifier_biases = [densenet.classifier.bias]\n",
    "features_weights = [\n",
    "    p for n, p in densenet.named_parameters()\n",
    "    if 'conv' in n\n",
    "]\n",
    "# features_weights += [\n",
    "#     p for n, p in densenet.named_parameters()\n",
    "#     if 'downsample.0' in n and 'weight' in n\n",
    "# ]\n",
    "features_bn_weights = [\n",
    "    p for n, p in densenet.named_parameters()\n",
    "    if 'weight' in n and 'norm' in n\n",
    "]\n",
    "features_bn_biases = [\n",
    "    p for n, p in densenet.named_parameters()\n",
    "    if 'bias' in n and 'norm' in n\n",
    "]\n",
    "\n",
    "# you can set different learning rates\n",
    "classifier_lr = 1e-2\n",
    "features_lr = 1e-2\n",
    "# but they are not actually used (because lr_scheduler is used)\n",
    "\n",
    "params = [\n",
    "    {'params': classifier_weights, 'lr': classifier_lr, 'weight_decay': 1e-3},\n",
    "    {'params': classifier_biases, 'lr': classifier_lr},\n",
    "    {'params': features_weights, 'lr': features_lr, 'weight_decay': 1e-3},\n",
    "    {'params': features_bn_weights, 'lr': features_lr},\n",
    "    {'params': features_bn_biases, 'lr': features_lr}\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# param = []\n",
    "# for p in densenet.parameters():\n",
    "#     if p.requires_grad == True:\n",
    "#         param.append(p)\n",
    "\n",
    "optimizer = optim.SGD(params, momentum=0.9, nesterov=True)\n",
    "\n",
    "criterion = nn.CrossEntropyLoss()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "24387\n"
     ]
    }
   ],
   "source": [
    "batch_size = 16\n",
    "train_folder, val_folder = get_folders()\n",
    "\n",
    "train_iterator = DataLoader(\n",
    "    train_folder, batch_size=batch_size, num_workers=0,\n",
    "    shuffle=True, pin_memory=False\n",
    ")\n",
    "\n",
    "val_iterator = DataLoader(\n",
    "    val_folder, batch_size=16, num_workers=0,\n",
    "    shuffle=False, pin_memory=False\n",
    ")\n",
    "\n",
    "# number of training samples\n",
    "train_size = len(train_folder.imgs)\n",
    "print(train_size)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1525\n"
     ]
    }
   ],
   "source": [
    "n_epochs = 50\n",
    "n_batches = ceil(train_size/batch_size)\n",
    "print(n_batches)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# cyclical cosine annealing\n",
    "# it changes the learning rate on every optimization step\n",
    "# 1e-6 is the minimal learning rate\n",
    "M = 1 \n",
    "# total number of optimization steps\n",
    "T = n_batches*n_epochs \n",
    "# initial learning rates\n",
    "initial1 = 1e-2\n",
    "initial2 = 1e-3\n",
    "\n",
    "\n",
    "def lr_scheduler(optimizer, step):\n",
    "    \n",
    "    global initial1\n",
    "    global initial2\n",
    "    decay = np.cos(np.pi*((step - 1) % (T // M))/(T // M)) + 1.0\n",
    "    \n",
    "    # params of the last fc layer\n",
    "    for param_group in optimizer.param_groups[:2]:\n",
    "        param_group['lr'] = ((initial1 - 1e-6)*decay/2.0) + 1e-6\n",
    "    \n",
    "    # params of the last two densenet blocks\n",
    "    for param_group in optimizer.param_groups[2:]:\n",
    "        param_group['lr'] = ((initial2 - 1e-6)*decay/2.0) + 1e-6\n",
    "    \n",
    "    if (step - 1) % (T // M) == 0 and step != 1:\n",
    "        print('lr is reset')\n",
    "        \n",
    "    return optimizer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "validation_step: 1.00  avg_train_loss: 3.076  avg_val_loss: 1.979  avg_train_acc: 0.390  avg_val_acc: 0.551  time_per_val_step: 374.839\n",
      "saving model for epoch 0.\n",
      "validation_step: 2.00  avg_train_loss: 1.517  avg_val_loss: 1.619  avg_train_acc: 0.638  avg_val_acc: 0.632  time_per_val_step: 387.183\n",
      "validation_step: 3.00  avg_train_loss: 1.192  avg_val_loss: 1.427  avg_train_acc: 0.707  avg_val_acc: 0.666  time_per_val_step: 389.268\n",
      "validation_step: 4.00  avg_train_loss: 1.009  avg_val_loss: 1.438  avg_train_acc: 0.749  avg_val_acc: 0.676  time_per_val_step: 390.214\n",
      "validation_step: 5.00  avg_train_loss: 0.907  avg_val_loss: 1.445  avg_train_acc: 0.772  avg_val_acc: 0.692  time_per_val_step: 365.332\n",
      "validation_step: 6.00  avg_train_loss: 0.816  avg_val_loss: 1.329  avg_train_acc: 0.792  avg_val_acc: 0.704  time_per_val_step: 362.921\n",
      "saving model for epoch 5.\n",
      "validation_step: 7.00  avg_train_loss: 0.752  avg_val_loss: 1.328  avg_train_acc: 0.808  avg_val_acc: 0.715  time_per_val_step: 334.168\n",
      "validation_step: 8.00  avg_train_loss: 0.665  avg_val_loss: 1.295  avg_train_acc: 0.828  avg_val_acc: 0.725  time_per_val_step: 317.023\n",
      "validation_step: 9.00  avg_train_loss: 0.617  avg_val_loss: 1.222  avg_train_acc: 0.841  avg_val_acc: 0.725  time_per_val_step: 321.487\n",
      "validation_step: 10.00  avg_train_loss: 0.570  avg_val_loss: 1.322  avg_train_acc: 0.850  avg_val_acc: 0.719  time_per_val_step: 320.174\n",
      "validation_step: 11.00  avg_train_loss: 0.549  avg_val_loss: 1.203  avg_train_acc: 0.859  avg_val_acc: 0.746  time_per_val_step: 371.260\n",
      "saving model for epoch 10.\n",
      "validation_step: 12.00  avg_train_loss: 0.481  avg_val_loss: 1.260  avg_train_acc: 0.876  avg_val_acc: 0.731  time_per_val_step: 350.351\n",
      "validation_step: 13.00  avg_train_loss: 0.446  avg_val_loss: 1.210  avg_train_acc: 0.886  avg_val_acc: 0.731  time_per_val_step: 323.405\n",
      "validation_step: 14.00  avg_train_loss: 0.422  avg_val_loss: 1.208  avg_train_acc: 0.894  avg_val_acc: 0.738  time_per_val_step: 319.176\n",
      "validation_step: 15.00  avg_train_loss: 0.397  avg_val_loss: 1.187  avg_train_acc: 0.900  avg_val_acc: 0.744  time_per_val_step: 357.687\n",
      "validation_step: 16.00  avg_train_loss: 0.365  avg_val_loss: 1.241  avg_train_acc: 0.909  avg_val_acc: 0.739  time_per_val_step: 316.798\n",
      "saving model for epoch 15.\n",
      "validation_step: 17.00  avg_train_loss: 0.330  avg_val_loss: 1.150  avg_train_acc: 0.919  avg_val_acc: 0.752  time_per_val_step: 318.712\n",
      "validation_step: 18.00  avg_train_loss: 0.311  avg_val_loss: 1.148  avg_train_acc: 0.925  avg_val_acc: 0.745  time_per_val_step: 349.101\n",
      "validation_step: 19.00  avg_train_loss: 0.288  avg_val_loss: 1.108  avg_train_acc: 0.931  avg_val_acc: 0.759  time_per_val_step: 371.038\n",
      "validation_step: 20.00  avg_train_loss: 0.280  avg_val_loss: 1.085  avg_train_acc: 0.935  avg_val_acc: 0.772  time_per_val_step: 370.677\n",
      "validation_step: 21.00  avg_train_loss: 0.249  avg_val_loss: 1.134  avg_train_acc: 0.944  avg_val_acc: 0.754  time_per_val_step: 317.595\n",
      "saving model for epoch 20.\n",
      "validation_step: 22.00  avg_train_loss: 0.240  avg_val_loss: 1.163  avg_train_acc: 0.945  avg_val_acc: 0.748  time_per_val_step: 309.903\n",
      "validation_step: 23.00  avg_train_loss: 0.219  avg_val_loss: 1.161  avg_train_acc: 0.953  avg_val_acc: 0.760  time_per_val_step: 312.276\n",
      "validation_step: 24.00  avg_train_loss: 0.209  avg_val_loss: 1.162  avg_train_acc: 0.954  avg_val_acc: 0.753  time_per_val_step: 319.296\n",
      "validation_step: 25.00  avg_train_loss: 0.184  avg_val_loss: 1.024  avg_train_acc: 0.962  avg_val_acc: 0.774  time_per_val_step: 320.261\n",
      "validation_step: 26.00  avg_train_loss: 0.173  avg_val_loss: 0.935  avg_train_acc: 0.967  avg_val_acc: 0.790  time_per_val_step: 327.777\n",
      "saving model for epoch 25.\n",
      "validation_step: 27.00  avg_train_loss: 0.159  avg_val_loss: 1.021  avg_train_acc: 0.970  avg_val_acc: 0.781  time_per_val_step: 327.258\n",
      "validation_step: 28.00  avg_train_loss: 0.158  avg_val_loss: 1.054  avg_train_acc: 0.970  avg_val_acc: 0.778  time_per_val_step: 324.194\n",
      "validation_step: 29.00  avg_train_loss: 0.152  avg_val_loss: 1.003  avg_train_acc: 0.971  avg_val_acc: 0.779  time_per_val_step: 321.849\n",
      "validation_step: 30.00  avg_train_loss: 0.142  avg_val_loss: 1.008  avg_train_acc: 0.974  avg_val_acc: 0.784  time_per_val_step: 332.472\n",
      "validation_step: 31.00  avg_train_loss: 0.136  avg_val_loss: 0.992  avg_train_acc: 0.976  avg_val_acc: 0.780  time_per_val_step: 335.676\n",
      "saving model for epoch 30.\n",
      "validation_step: 32.00  avg_train_loss: 0.124  avg_val_loss: 0.977  avg_train_acc: 0.979  avg_val_acc: 0.782  time_per_val_step: 324.010\n",
      "validation_step: 33.00  avg_train_loss: 0.115  avg_val_loss: 1.048  avg_train_acc: 0.982  avg_val_acc: 0.773  time_per_val_step: 322.407\n",
      "validation_step: 34.00  avg_train_loss: 0.116  avg_val_loss: 1.010  avg_train_acc: 0.981  avg_val_acc: 0.772  time_per_val_step: 315.948\n",
      "validation_step: 35.00  avg_train_loss: 0.106  avg_val_loss: 0.984  avg_train_acc: 0.984  avg_val_acc: 0.799  time_per_val_step: 327.520\n",
      "validation_step: 36.00  avg_train_loss: 0.105  avg_val_loss: 0.938  avg_train_acc: 0.984  avg_val_acc: 0.789  time_per_val_step: 367.053\n",
      "saving model for epoch 35.\n",
      "validation_step: 37.00  avg_train_loss: 0.101  avg_val_loss: 0.996  avg_train_acc: 0.985  avg_val_acc: 0.782  time_per_val_step: 396.758\n",
      "validation_step: 38.00  avg_train_loss: 0.095  avg_val_loss: 1.015  avg_train_acc: 0.987  avg_val_acc: 0.791  time_per_val_step: 356.036\n",
      "validation_step: 39.00  avg_train_loss: 0.092  avg_val_loss: 0.955  avg_train_acc: 0.987  avg_val_acc: 0.798  time_per_val_step: 347.604\n",
      "validation_step: 40.00  avg_train_loss: 0.094  avg_val_loss: 0.958  avg_train_acc: 0.987  avg_val_acc: 0.784  time_per_val_step: 347.381\n",
      "validation_step: 41.00  avg_train_loss: 0.090  avg_val_loss: 0.917  avg_train_acc: 0.988  avg_val_acc: 0.791  time_per_val_step: 335.614\n",
      "saving model for epoch 40.\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-13-85c81c97194b>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      3\u001b[0m     \u001b[0mtrain_iterator\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mn_epochs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mn_batches\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      4\u001b[0m     \u001b[0mval_iterator\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalidation_step\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m1525\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mn_validation_batches\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m80\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 5\u001b[1;33m     \u001b[0msaving_epoch\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m5\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlr_scheduler\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mlr_scheduler\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      6\u001b[0m )\n",
      "\u001b[1;32md:\\A-DATA_STORAGE\\大三上学期\\CV\\实验5\\train_densenet.py\u001b[0m in \u001b[0;36mtrain\u001b[1;34m(model, criterion, optimizer, train_iterator, n_epochs, n_batches, val_iterator, validation_step, n_validation_batches, saving_epoch, lr_scheduler)\u001b[0m\n\u001b[0;32m    113\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    114\u001b[0m             batch_loss, batch_accuracy = optimization_step(\n\u001b[1;32m--> 115\u001b[1;33m                 \u001b[0mmodel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcriterion\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mx_batch\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_batch\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    116\u001b[0m             )\n\u001b[0;32m    117\u001b[0m             \u001b[0mrunning_loss\u001b[0m \u001b[1;33m+=\u001b[0m \u001b[0mbatch_loss\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32md:\\A-DATA_STORAGE\\大三上学期\\CV\\实验5\\train_densenet.py\u001b[0m in \u001b[0;36moptimization_step\u001b[1;34m(model, criterion, optimizer, x_batch, y_batch)\u001b[0m\n\u001b[0;32m     80\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     81\u001b[0m     \u001b[0moptimizer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mzero_grad\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 82\u001b[1;33m     \u001b[0mloss\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     83\u001b[0m     \u001b[0moptimizer\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mstep\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     84\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python37\\lib\\site-packages\\torch\\_tensor.py\u001b[0m in \u001b[0;36mbackward\u001b[1;34m(self, gradient, retain_graph, create_graph, inputs)\u001b[0m\n\u001b[0;32m    253\u001b[0m                 \u001b[0mcreate_graph\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mcreate_graph\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    254\u001b[0m                 inputs=inputs)\n\u001b[1;32m--> 255\u001b[1;33m         \u001b[0mtorch\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mautograd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mbackward\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mgradient\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0minputs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    256\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    257\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0mregister_hook\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mhook\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python37\\lib\\site-packages\\torch\\autograd\\__init__.py\u001b[0m in \u001b[0;36mbackward\u001b[1;34m(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)\u001b[0m\n\u001b[0;32m    147\u001b[0m     Variable._execution_engine.run_backward(\n\u001b[0;32m    148\u001b[0m         \u001b[0mtensors\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mgrad_tensors_\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mretain_graph\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcreate_graph\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 149\u001b[1;33m         allow_unreachable=True, accumulate_grad=True)  # allow_unreachable flag\n\u001b[0m\u001b[0;32m    150\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    151\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "all_losses = train(\n",
    "    densenet, criterion, optimizer, \n",
    "    train_iterator, n_epochs, n_batches, \n",
    "    val_iterator, validation_step=1525, n_validation_batches=80, \n",
    "    saving_epoch=5, lr_scheduler=lr_scheduler\n",
    ")\n"
   ]
  }
 ],
 "metadata": {
  "interpreter": {
   "hash": "494899efd6527d56ea7f55c588d0081523a17dc3a9ff1107f3394ad815ff2527"
  },
  "kernelspec": {
   "display_name": "Python 3.7.7 64-bit",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
